home *** CD-ROM | disk | FTP | other *** search
- /***********************************************************
-
- Copyright 1995 by Tom Lord
-
- All Rights Reserved
-
- Permission to use, copy, modify, and distribute this software and its
- documentation for any purpose and without fee is hereby granted,
- provided that the above copyright notice appear in all copies and that
- both that copyright notice and this permission notice appear in
- supporting documentation, and that the name of the copyright holder not be
- used in advertising or publicity pertaining to distribution of the
- software without specific, written prior permission.
-
- Tom Lord DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- EVENT SHALL TOM LORD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- PERFORMANCE OF THIS SOFTWARE.
-
- ******************************************************************/
-
-
- /*
- * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
- */
-
-
- #include "rxall.h"
- #include "rxsuper.h"
-
-
-
-
- /* The functions in the next several pages define the lazy-NFA-conversion used
- * by matchers. The input to this construction is an NFA such as
- * is built by compactify_nfa (rx.c). The output is the superNFA.
- */
-
- /* Match engines can use arbitrary values for opcodes. So, the parse tree
- * is built using instructions names (enum rx_opcode), but the superstate
- * nfa is populated with mystery opcodes (void *).
- *
- * For convenience, here is an id table. The opcodes are == to their inxs
- *
- * The lables in re_search_2 would make good values for instructions.
- */
-
- void * rx_id_instruction_table[rx_num_instructions] =
- {
- (void *) rx_backtrack_point,
- (void *) rx_do_side_effects,
- (void *) rx_cache_miss,
- (void *) rx_next_char,
- (void *) rx_backtrack,
- (void *) rx_error_inx
- };
-
-
-
-
- /* The partially instantiated superstate graph has a transition
- * table at every node. There is one entry for every character.
- * This fills in the transition for a set.
- */
- #ifdef __STDC__
- static void
- install_transition (struct rx_superstate *super,
- struct rx_inx *answer, rx_Bitset trcset)
- #else
- static void
- install_transition (super, answer, trcset)
- struct rx_superstate *super;
- struct rx_inx *answer;
- rx_Bitset trcset;
- #endif
- {
- struct rx_inx * transitions = super->transitions;
- int chr;
- for (chr = 0; chr < 256; )
- if (!*trcset)
- {
- ++trcset;
- chr += 32;
- }
- else
- {
- RX_subset sub = *trcset;
- RX_subset mask = 1;
- int bound = chr + 32;
- while (chr < bound)
- {
- if (sub & mask)
- transitions [chr] = *answer;
- ++chr;
- mask <<= 1;
- }
- ++trcset;
- }
- }
-
-
- #ifdef __STDC__
- static int
- qlen (struct rx_superstate * q)
- #else
- static int
- qlen (q)
- struct rx_superstate * q;
- #endif
- {
- int count = 1;
- struct rx_superstate * it;
- if (!q)
- return 0;
- for (it = q->next_recyclable; it != q; it = it->next_recyclable)
- ++count;
- return count;
- }
-
- #ifdef __STDC__
- static void
- check_cache (struct rx_cache * cache)
- #else
- static void
- check_cache (cache)
- struct rx_cache * cache;
- #endif
- {
- struct rx_cache * you_fucked_up = 0;
- int total = cache->superstates;
- int semi = cache->semifree_superstates;
- if (semi != qlen (cache->semifree_superstate))
- check_cache (you_fucked_up);
- if ((total - semi) != qlen (cache->lru_superstate))
- check_cache (you_fucked_up);
- }
- #ifdef __STDC__
- char *
- rx_cache_malloc (struct rx_cache * cache, int size)
- #else
- char *
- rx_cache_malloc (cache, size)
- struct rx_cache * cache;
- int size;
- #endif
- {
- char * answer;
- answer = (char *)malloc (size);
- if (answer)
- cache->bytes_used += size;
- return answer;
- }
-
-
- #ifdef __STDC__
- void
- rx_cache_free (struct rx_cache * cache, int size, char * mem)
- #else
- void
- rx_cache_free (cache, size, mem)
- struct rx_cache * cache;
- int size;
- char * mem;
- #endif
- {
- free (mem);
- cache->bytes_used -= size;
- }
-
-
-
- /* When a superstate is old and neglected, it can enter a
- * semi-free state. A semi-free state is slated to die.
- * Incoming transitions to a semi-free state are re-written
- * to cause an (interpreted) fault when they are taken.
- * The fault handler revives the semi-free state, patches
- * incoming transitions back to normal, and continues.
- *
- * The idea is basicly to free in two stages, aborting
- * between the two if the state turns out to be useful again.
- * When a free is aborted, the rescued superstate is placed
- * in the most-favored slot to maximize the time until it
- * is next semi-freed.
- */
-
- #ifdef __STDC__
- static void
- semifree_superstate (struct rx_cache * cache)
- #else
- static void
- semifree_superstate (cache)
- struct rx_cache * cache;
- #endif
- {
- int disqualified = cache->semifree_superstates;
- if (disqualified == cache->superstates)
- return;
- while (cache->lru_superstate->locks)
- {
- cache->lru_superstate = cache->lru_superstate->next_recyclable;
- ++disqualified;
- if (disqualified == cache->superstates)
- return;
- }
- {
- struct rx_superstate * it = cache->lru_superstate;
- it->next_recyclable->prev_recyclable = it->prev_recyclable;
- it->prev_recyclable->next_recyclable = it->next_recyclable;
- cache->lru_superstate = (it == it->next_recyclable
- ? 0
- : it->next_recyclable);
- if (!cache->semifree_superstate)
- {
- cache->semifree_superstate = it;
- it->next_recyclable = it;
- it->prev_recyclable = it;
- }
- else
- {
- it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
- it->next_recyclable = cache->semifree_superstate;
- it->prev_recyclable->next_recyclable = it;
- it->next_recyclable->prev_recyclable = it;
- }
- {
- struct rx_distinct_future *df;
- it->is_semifree = 1;
- ++cache->semifree_superstates;
- df = it->transition_refs;
- if (df)
- {
- df->prev_same_dest->next_same_dest = 0;
- for (df = it->transition_refs; df; df = df->next_same_dest)
- {
- df->future_frame.inx = cache->instruction_table[rx_cache_miss];
- df->future_frame.data = 0;
- df->future_frame.data_2 = (void *) df;
- /* If there are any NEXT-CHAR instruction frames that
- * refer to this state, we convert them to CACHE-MISS frames.
- */
- if (!df->effects
- && (df->edge->options->next_same_super_edge[0]
- == df->edge->options))
- install_transition (df->present, &df->future_frame,
- df->edge->cset);
- }
- df = it->transition_refs;
- df->prev_same_dest->next_same_dest = df;
- }
- }
- }
- }
-
-
- #ifdef __STDC__
- static void
- refresh_semifree_superstate (struct rx_cache * cache,
- struct rx_superstate * super)
- #else
- static void
- refresh_semifree_superstate (cache, super)
- struct rx_cache * cache;
- struct rx_superstate * super;
- #endif
- {
- struct rx_distinct_future *df;
-
- if (super->transition_refs)
- {
- super->transition_refs->prev_same_dest->next_same_dest = 0;
- for (df = super->transition_refs; df; df = df->next_same_dest)
- {
- df->future_frame.inx = cache->instruction_table[rx_next_char];
- df->future_frame.data = (void *) super->transitions;
- df->future_frame.data_2 = (void *)(super->contents->is_final);
- /* CACHE-MISS instruction frames that refer to this state,
- * must be converted to NEXT-CHAR frames.
- */
- if (!df->effects
- && (df->edge->options->next_same_super_edge[0]
- == df->edge->options))
- install_transition (df->present, &df->future_frame,
- df->edge->cset);
- }
- super->transition_refs->prev_same_dest->next_same_dest
- = super->transition_refs;
- }
- if (cache->semifree_superstate == super)
- cache->semifree_superstate = (super->prev_recyclable == super
- ? 0
- : super->prev_recyclable);
- super->next_recyclable->prev_recyclable = super->prev_recyclable;
- super->prev_recyclable->next_recyclable = super->next_recyclable;
-
- if (!cache->lru_superstate)
- (cache->lru_superstate
- = super->next_recyclable
- = super->prev_recyclable
- = super);
- else
- {
- super->next_recyclable = cache->lru_superstate;
- super->prev_recyclable = cache->lru_superstate->prev_recyclable;
- super->next_recyclable->prev_recyclable = super;
- super->prev_recyclable->next_recyclable = super;
- }
- super->is_semifree = 0;
- --cache->semifree_superstates;
- }
-
- #ifdef __STDC__
- static void
- rx_refresh_this_superstate (struct rx_cache * cache,
- struct rx_superstate * superstate)
- #else
- static void
- rx_refresh_this_superstate (cache, superstate)
- struct rx_cache * cache;
- struct rx_superstate * superstate;
- #endif
- {
- if (superstate->is_semifree)
- refresh_semifree_superstate (cache, superstate);
- else if (cache->lru_superstate == superstate)
- cache->lru_superstate = superstate->next_recyclable;
- else if (superstate != cache->lru_superstate->prev_recyclable)
- {
- superstate->next_recyclable->prev_recyclable
- = superstate->prev_recyclable;
- superstate->prev_recyclable->next_recyclable
- = superstate->next_recyclable;
- superstate->next_recyclable = cache->lru_superstate;
- superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
- superstate->next_recyclable->prev_recyclable = superstate;
- superstate->prev_recyclable->next_recyclable = superstate;
- }
- }
-
- #ifdef __STDC__
- static void
- release_superset_low (struct rx_cache * cache,
- struct rx_superset *set)
- #else
- static void
- release_superset_low (cache, set)
- struct rx_cache * cache;
- struct rx_superset *set;
- #endif
- {
- if (!--set->refs)
- {
- if (set->cdr)
- release_superset_low (cache, set->cdr);
-
- set->starts_for = 0;
-
- rx_hash_free (&set->hash_item, &cache->superset_hash_rules);
- rx_cache_free (cache,
- sizeof (struct rx_superset),
- (char *)set);
- }
- }
-
- #ifdef __STDC__
- void
- rx_release_superset (struct rx *rx,
- struct rx_superset *set)
- #else
- void
- rx_release_superset (rx, set)
- struct rx *rx;
- struct rx_superset *set;
- #endif
- {
- release_superset_low (rx->cache, set);
- }
-
- /* This tries to add a new superstate to the superstate freelist.
- * It might, as a result, free some edge pieces or hash tables.
- * If nothing can be freed because too many locks are being held, fail.
- */
-
- #ifdef __STDC__
- static int
- rx_really_free_superstate (struct rx_cache * cache)
- #else
- static int
- rx_really_free_superstate (cache)
- struct rx_cache * cache;
- #endif
- {
- int locked_superstates = 0;
- struct rx_superstate * it;
-
- if (!cache->superstates)
- return 0;
-
- /* scale these */
- while ((cache->hits + cache->misses) > cache->superstates)
- {
- cache->hits >>= 1;
- cache->misses >>= 1;
- }
-
- /* semifree superstates twice as fast as they are freed
- * so popular states can be rescued.
- */
- semifree_superstate (cache);
- semifree_superstate (cache);
- semifree_superstate (cache);
-
- #if 0
- Redundant because semifree_superstate already
- makes this check;
-
-
- while (cache->semifree_superstate && cache->semifree_superstate->locks)
- {
- refresh_semifree_superstate (cache, cache->semifree_superstate);
- ++locked_superstates;
- if (locked_superstates == cache->superstates)
- return 0;
- }
-
-
- if (cache->semifree_superstate)
- insert the "it =" block which follows this "if 0" code;
- else
- {
- while (cache->lru_superstate->locks)
- {
- cache->lru_superstate = cache->lru_superstate->next_recyclable;
- ++locked_superstates;
- if (locked_superstates == cache->superstates)
- return 0;
- }
- it = cache->lru_superstate;
- it->next_recyclable->prev_recyclable = it->prev_recyclable;
- it->prev_recyclable->next_recyclable = it->next_recyclable;
- cache->lru_superstate = ((it == it->next_recyclable)
- ? 0
- : it->next_recyclable);
- }
- #endif
-
- if (!cache->semifree_superstate)
- return 0;
-
- {
- it = cache->semifree_superstate;
- it->next_recyclable->prev_recyclable = it->prev_recyclable;
- it->prev_recyclable->next_recyclable = it->next_recyclable;
- cache->semifree_superstate = ((it == it->next_recyclable)
- ? 0
- : it->next_recyclable);
- --cache->semifree_superstates;
- }
-
-
- if (it->transition_refs)
- {
- struct rx_distinct_future *df;
- for (df = it->transition_refs,
- df->prev_same_dest->next_same_dest = 0;
- df;
- df = df->next_same_dest)
- {
- df->future_frame.inx = cache->instruction_table[rx_cache_miss];
- df->future_frame.data = 0;
- df->future_frame.data_2 = (void *) df;
- df->future = 0;
- }
- it->transition_refs->prev_same_dest->next_same_dest =
- it->transition_refs;
- }
- {
- struct rx_super_edge *tc = it->edges;
- while (tc)
- {
- struct rx_distinct_future * df;
- struct rx_super_edge *tct = tc->next;
- df = tc->options;
- df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
- while (df)
- {
- struct rx_distinct_future *dft = df;
- df = df->next_same_super_edge[0];
-
-
- if (dft->future && dft->future->transition_refs == dft)
- {
- dft->future->transition_refs = dft->next_same_dest;
- if (dft->future->transition_refs == dft)
- dft->future->transition_refs = 0;
- }
- dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
- dft->prev_same_dest->next_same_dest = dft->next_same_dest;
- rx_cache_free (cache,
- sizeof (struct rx_distinct_future),
- (char *)dft);
- }
- rx_cache_free (cache,
- sizeof (struct rx_super_edge),
- (char *)tc);
- tc = tct;
- }
- }
-
- if (it->contents->superstate == it)
- it->contents->superstate = 0;
- release_superset_low (cache, it->contents);
- rx_cache_free (cache,
- (sizeof (struct rx_superstate)
- + cache->local_cset_size * sizeof (struct rx_inx)),
- (char *)it);
- --cache->superstates;
- return 1;
- }
-
-
- #ifdef __STDC__
- static char *
- rx_cache_malloc_or_get (struct rx_cache * cache, int size)
- #else
- static char *
- rx_cache_malloc_or_get (cache, size)
- struct rx_cache * cache;
- int size;
- #endif
- {
- while ( (cache->bytes_used + size > cache->bytes_allowed)
- && rx_really_free_superstate (cache))
- ;
-
- return rx_cache_malloc (cache, size);
- }
-
-
-
- #ifdef __STDC__
- static int
- supersetcmp (void * va, void * vb)
- #else
- static int
- supersetcmp (va, vb)
- void * va;
- void * vb;
- #endif
- {
- struct rx_superset * a = (struct rx_superset *)va;
- struct rx_superset * b = (struct rx_superset *)vb;
- return ( (a == b)
- || (a && b && (a->id == b->id) && (a->car == b->car) && (a->cdr == b->cdr)));
- }
-
- #ifdef __STDC__
- static struct rx_hash_item *
- superset_allocator (struct rx_hash_rules * rules, void * val)
- #else
- static struct rx_hash_item *
- superset_allocator (rules, val)
- struct rx_hash_rules * rules;
- void * val;
- #endif
- {
- struct rx_cache * cache;
- struct rx_superset * template;
- struct rx_superset * newset;
-
- cache = ((struct rx_cache *)
- ((char *)rules
- - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
- template = (struct rx_superset *)val;
- newset = ((struct rx_superset *)
- rx_cache_malloc (cache, sizeof (*template)));
- if (!newset)
- return 0;
- newset->refs = 0;
- newset->car = template->car;
- newset->id = template->id;
- newset->cdr = template->cdr;
- newset->is_final = ( template->car->is_final
- || ( template->cdr
- && template->cdr->is_final));
- newset->superstate = 0;
- rx_protect_superset (rx, template->cdr);
- newset->hash_item.data = (void *)newset;
- newset->hash_item.binding = 0;
- return &newset->hash_item;
- }
-
- #ifdef __STDC__
- static struct rx_hash *
- super_hash_allocator (struct rx_hash_rules * rules)
- #else
- static struct rx_hash *
- super_hash_allocator (rules)
- struct rx_hash_rules * rules;
- #endif
- {
- struct rx_cache * cache;
- cache = ((struct rx_cache *)
- ((char *)rules
- - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
- return ((struct rx_hash *)
- rx_cache_malloc (cache, sizeof (struct rx_hash)));
- }
-
-
- #ifdef __STDC__
- static void
- super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
- #else
- static void
- super_hash_liberator (hash, rules)
- struct rx_hash * hash;
- struct rx_hash_rules * rules;
- #endif
- {
- struct rx_cache * cache
- = ((struct rx_cache *)
- (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
- rx_cache_free (cache, sizeof (struct rx_hash), (char *)hash);
- }
-
- #ifdef __STDC__
- static void
- superset_hash_item_liberator (struct rx_hash_item * it,
- struct rx_hash_rules * rules)
- #else
- static void
- superset_hash_item_liberator (it, rules)
- struct rx_hash_item * it;
- struct rx_hash_rules * rules;
- #endif
- {
- }
-
- int rx_cache_bound = 3;
- static int rx_default_cache_got = 0;
-
- static struct rx_cache default_cache =
- {
- {
- supersetcmp,
- super_hash_allocator,
- super_hash_liberator,
- superset_allocator,
- superset_hash_item_liberator,
- },
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 524288,
- 0,
- 256,
- rx_id_instruction_table,
- {
- 0,
- 0,
- 0,
- 0,
- 0
- }
- };
- struct rx_cache * rx_default_cache = &default_cache;
-
- /* This adds an element to a superstate set. These sets are lists, such
- * that lists with == elements are ==. The empty set is returned by
- * superset_cons (rx, 0, 0) and is NOT equivelent to
- * (struct rx_superset)0.
- */
-
- #ifdef __STDC__
- struct rx_superset *
- rx_superset_cons (struct rx * rx,
- struct rx_nfa_state *car, struct rx_superset *cdr)
- #else
- struct rx_superset *
- rx_superset_cons (rx, car, cdr)
- struct rx * rx;
- struct rx_nfa_state *car;
- struct rx_superset *cdr;
- #endif
- {
- struct rx_cache * cache = rx->cache;
- if (!car && !cdr)
- {
- if (!cache->empty_superset)
- {
- cache->empty_superset
- = ((struct rx_superset *)
- rx_cache_malloc (cache, sizeof (struct rx_superset)));
- if (!cache->empty_superset)
- return 0;
- bzero (cache->empty_superset, sizeof (struct rx_superset));
- cache->empty_superset->refs = 1000;
- }
- return cache->empty_superset;
- }
- {
- struct rx_superset template;
- struct rx_hash_item * hit;
- template.car = car;
- template.cdr = cdr;
- template.id = rx->rx_id;
- hit = rx_hash_store (&cache->superset_table,
- (unsigned long)car ^ car->id ^ (unsigned long)cdr,
- (void *)&template,
- &cache->superset_hash_rules);
- return (hit
- ? (struct rx_superset *)hit->data
- : 0);
- }
- }
-
- /* This computes a union of two NFA state sets. The sets do not have the
- * same representation though. One is a RX_SUPERSET structure (part
- * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
- */
-
- #ifdef __STDC__
- struct rx_superset *
- rx_superstate_eclosure_union
- (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
- #else
- struct rx_superset *
- rx_superstate_eclosure_union (rx, set, ecl)
- struct rx * rx;
- struct rx_superset *set;
- struct rx_nfa_state_set *ecl;
- #endif
- {
- if (!ecl)
- return set;
-
- if (!set->car)
- return rx_superset_cons (rx, ecl->car,
- rx_superstate_eclosure_union (rx, set, ecl->cdr));
- if (set->car == ecl->car)
- return rx_superstate_eclosure_union (rx, set, ecl->cdr);
-
- {
- struct rx_superset * tail;
- struct rx_nfa_state * first;
-
- if (set->car > ecl->car)
- {
- tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
- first = set->car;
- }
- else
- {
- tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
- first = ecl->car;
- }
- if (!tail)
- return 0;
- else
- {
- struct rx_superset * answer;
- answer = rx_superset_cons (rx, first, tail);
- if (!answer)
- {
- rx_protect_superset (rx, tail);
- rx_release_superset (rx, tail);
- return 0;
- }
- else
- return answer;
- }
- }
- }
-
-
-
-
- /*
- * This makes sure that a list of rx_distinct_futures contains
- * a future for each possible set of side effects in the eclosure
- * of a given state. This is some of the work of filling in a
- * superstate transition.
- */
-
- #ifdef __STDC__
- static struct rx_distinct_future *
- include_futures (struct rx *rx,
- struct rx_distinct_future *df, struct rx_nfa_state
- *state, struct rx_superstate *superstate)
- #else
- static struct rx_distinct_future *
- include_futures (rx, df, state, superstate)
- struct rx *rx;
- struct rx_distinct_future *df;
- struct rx_nfa_state *state;
- struct rx_superstate *superstate;
- #endif
- {
- struct rx_possible_future *future;
- struct rx_cache * cache = rx->cache;
- for (future = rx_state_possible_futures (rx, state); future; future = future->next)
- {
- struct rx_distinct_future *dfp;
- struct rx_distinct_future *insert_before = 0;
- if (df)
- df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
- for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
- if (dfp->effects == future->effects)
- break;
- else
- {
- int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
- if (order > 0)
- {
- insert_before = dfp;
- dfp = 0;
- break;
- }
- }
- if (df)
- df->next_same_super_edge[1]->next_same_super_edge[0] = df;
- if (!dfp)
- {
- dfp
- = ((struct rx_distinct_future *)
- rx_cache_malloc (cache,
- sizeof (struct rx_distinct_future)));
- if (!dfp)
- return 0;
- if (!df)
- {
- df = insert_before = dfp;
- df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
- }
- else if (!insert_before)
- insert_before = df;
- else if (insert_before == df)
- df = dfp;
-
- dfp->next_same_super_edge[0] = insert_before;
- dfp->next_same_super_edge[1]
- = insert_before->next_same_super_edge[1];
- dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
- dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
- dfp->next_same_dest = dfp->prev_same_dest = dfp;
- dfp->future = 0;
- dfp->present = superstate;
- dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
- dfp->future_frame.data = 0;
- dfp->future_frame.data_2 = (void *) dfp;
- dfp->side_effects_frame.inx
- = rx->instruction_table[rx_do_side_effects];
- dfp->side_effects_frame.data = 0;
- dfp->side_effects_frame.data_2 = (void *) dfp;
- dfp->effects = future->effects;
- }
- }
- return df;
- }
-
-
-
- /* This constructs a new superstate from its state set. The only
- * complexity here is memory management.
- */
- #ifdef __STDC__
- struct rx_superstate *
- rx_superstate (struct rx *rx,
- struct rx_superset *set)
- #else
- struct rx_superstate *
- rx_superstate (rx, set)
- struct rx *rx;
- struct rx_superset *set;
- #endif
- {
- struct rx_cache * cache = rx->cache;
- struct rx_superstate * superstate = 0;
-
- /* Does the superstate already exist in the cache? */
- if (set->superstate)
- {
- if (set->superstate->rx_id != rx->rx_id)
- {
- /* Aha. It is in the cache, but belongs to a superstate
- * that refers to an NFA that no longer exists.
- * (We know it no longer exists because it was evidently
- * stored in the same region of memory as the current nfa
- * yet it has a different id.)
- */
- superstate = set->superstate;
- if (!superstate->is_semifree)
- {
- if (cache->lru_superstate == superstate)
- {
- cache->lru_superstate = superstate->next_recyclable;
- if (cache->lru_superstate == superstate)
- cache->lru_superstate = 0;
- }
- {
- superstate->next_recyclable->prev_recyclable
- = superstate->prev_recyclable;
- superstate->prev_recyclable->next_recyclable
- = superstate->next_recyclable;
- if (!cache->semifree_superstate)
- {
- (cache->semifree_superstate
- = superstate->next_recyclable
- = superstate->prev_recyclable
- = superstate);
- }
- else
- {
- superstate->next_recyclable = cache->semifree_superstate;
- superstate->prev_recyclable
- = cache->semifree_superstate->prev_recyclable;
- superstate->next_recyclable->prev_recyclable
- = superstate;
- superstate->prev_recyclable->next_recyclable
- = superstate;
- cache->semifree_superstate = superstate;
- }
- ++cache->semifree_superstates;
- }
- }
- set->superstate = 0;
- goto handle_cache_miss;
- }
- ++cache->hits;
- superstate = set->superstate;
-
- rx_refresh_this_superstate (cache, superstate);
- return superstate;
- }
-
- handle_cache_miss:
-
- /* This point reached only for cache misses. */
- ++cache->misses;
- #if RX_DEBUG
- if (rx_debug_trace > 1)
- {
- struct rx_superset * setp = set;
- fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
- while (setp)
- {
- fprintf (stderr, "%d ", setp->id);
- setp = setp->cdr;
- }
- fprintf (stderr, "(%d)\n", set);
- }
- #endif
-
- {
- int superstate_size;
- superstate_size = ( sizeof (*superstate)
- + ( sizeof (struct rx_inx)
- * rx->local_cset_size));
- superstate = ((struct rx_superstate *)
- rx_cache_malloc_or_get (cache, superstate_size));
- ++cache->superstates;
- }
-
- if (!superstate)
- return 0;
-
- if (!cache->lru_superstate)
- (cache->lru_superstate
- = superstate->next_recyclable
- = superstate->prev_recyclable
- = superstate);
- else
- {
- superstate->next_recyclable = cache->lru_superstate;
- superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
- ( superstate->prev_recyclable->next_recyclable
- = superstate->next_recyclable->prev_recyclable
- = superstate);
- }
- superstate->rx_id = rx->rx_id;
- superstate->transition_refs = 0;
- superstate->locks = 0;
- superstate->is_semifree = 0;
- set->superstate = superstate;
- superstate->contents = set;
- rx_protect_superset (rx, set);
- superstate->edges = 0;
- {
- int x;
- /* None of the transitions from this superstate are known yet. */
- for (x = 0; x < rx->local_cset_size; ++x) /* &&&&& 3.8 % */
- {
- struct rx_inx * ifr = &superstate->transitions[x];
- ifr->inx = rx->instruction_table [rx_cache_miss];
- ifr->data = ifr->data_2 = 0;
- }
- }
- return superstate;
- }
-
-
- /* This computes the destination set of one edge of the superstate NFA.
- * Note that a RX_DISTINCT_FUTURE is a superstate edge.
- * Returns 0 on an allocation failure.
- */
-
- #ifdef __STDC__
- static int
- solve_destination (struct rx *rx, struct rx_distinct_future *df)
- #else
- static int
- solve_destination (rx, df)
- struct rx *rx;
- struct rx_distinct_future *df;
- #endif
- {
- struct rx_super_edge *tc = df->edge;
- struct rx_superset *nfa_state;
- struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
- struct rx_superset *solution = nil_set;
- struct rx_superstate *dest;
-
- rx_protect_superset (rx, solution);
- /* Iterate over all NFA states in the state set of this superstate. */
- for (nfa_state = df->present->contents;
- nfa_state->car;
- nfa_state = nfa_state->cdr)
- {
- struct rx_nfa_edge *e;
- /* Iterate over all edges of each NFA state. */
- for (e = nfa_state->car->edges; e; e = e->next)
- /* If we find an edge that is labeled with
- * the characters we are solving for.....
- */
- if ((e->type == ne_cset)
- && rx_bitset_is_subset (rx->local_cset_size,
- tc->cset,
- e->params.cset))
- {
- struct rx_nfa_state *n = e->dest;
- struct rx_possible_future *pf;
- /* ....search the partial epsilon closures of the destination
- * of that edge for a path that involves the same set of
- * side effects we are solving for.
- * If we find such a RX_POSSIBLE_FUTURE, we add members to the
- * stateset we are computing.
- */
- for (pf = rx_state_possible_futures (rx, n); pf; pf = pf->next)
- if (pf->effects == df->effects)
- {
- struct rx_superset * old_sol;
- old_sol = solution;
- solution = rx_superstate_eclosure_union (rx, solution,
- pf->destset);
- if (!solution)
- return 0;
- rx_protect_superset (rx, solution);
- rx_release_superset (rx, old_sol);
- }
- }
- }
- /* It is possible that the RX_DISTINCT_FUTURE we are working on has
- * the empty set of NFA states as its definition. In that case, this
- * is a failure point.
- */
- if (solution == nil_set)
- {
- df->future_frame.inx = (void *) rx_backtrack;
- df->future_frame.data = 0;
- df->future_frame.data_2 = 0;
- return 1;
- }
- dest = rx_superstate (rx, solution);
- rx_release_superset (rx, solution);
- if (!dest)
- return 0;
-
- {
- struct rx_distinct_future *dft;
- dft = df;
- df->prev_same_dest->next_same_dest = 0;
- while (dft)
- {
- dft->future = dest;
- dft->future_frame.inx = rx->instruction_table[rx_next_char];
- dft->future_frame.data = (void *) dest->transitions;
- dft->future_frame.data_2 = (void *) dest->contents->is_final;
- dft = dft->next_same_dest;
- }
- df->prev_same_dest->next_same_dest = df;
- }
- if (!dest->transition_refs)
- dest->transition_refs = df;
- else
- {
- struct rx_distinct_future *dft = dest->transition_refs->next_same_dest;
- dest->transition_refs->next_same_dest = df->next_same_dest;
- df->next_same_dest->prev_same_dest = dest->transition_refs;
- df->next_same_dest = dft;
- dft->prev_same_dest = df;
- }
- return 1;
- }
-
-
- /* This takes a superstate and a character, and computes some edges
- * from the superstate NFA. In particular, this computes all edges
- * that lead from SUPERSTATE given CHR. This function also
- * computes the set of characters that share this edge set.
- * This returns 0 on allocation error.
- * The character set and list of edges are returned through
- * the paramters CSETOUT and DFOUT.
- } */
-
- #ifdef __STDC__
- static int
- compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
- rx_Bitset csetout, struct rx_superstate *superstate,
- unsigned char chr)
- #else
- static int
- compute_super_edge (rx, dfout, csetout, superstate, chr)
- struct rx *rx;
- struct rx_distinct_future **dfout;
- rx_Bitset csetout;
- struct rx_superstate *superstate;
- unsigned char chr;
- #endif
- {
- struct rx_superset *stateset = superstate->contents;
-
- /* To compute the set of characters that share edges with CHR,
- * we start with the full character set, and subtract.
- */
- rx_bitset_universe (rx->local_cset_size, csetout);
- *dfout = 0;
-
- /* Iterate over the NFA states in the superstate state-set. */
- while (stateset->car)
- {
- struct rx_nfa_edge *e;
- for (e = stateset->car->edges; e; e = e->next)
- if (e->type == ne_cset)
- {
- if (!RX_bitset_member (e->params.cset, chr))
- /* An edge that doesn't apply at least tells us some characters
- * that don't share the same edge set as CHR.
- */
- rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
- else
- {
- /* If we find an NFA edge that applies, we make sure there
- * are corresponding edges in the superstate NFA.
- */
- {
- struct rx_distinct_future * saved;
- saved = *dfout;
- *dfout = include_futures (rx, *dfout, e->dest, superstate);
- if (!*dfout)
- {
- struct rx_distinct_future * df;
- df = saved;
- if (df)
- df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
- while (df)
- {
- struct rx_distinct_future *dft;
- dft = df;
- df = df->next_same_super_edge[0];
-
- if (dft->future && dft->future->transition_refs == dft)
- {
- dft->future->transition_refs = dft->next_same_dest;
- if (dft->future->transition_refs == dft)
- dft->future->transition_refs = 0;
- }
- dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
- dft->prev_same_dest->next_same_dest = dft->next_same_dest;
- rx_cache_free (rx->cache, sizeof (*dft), (char *)dft);
- }
- return 0;
- }
- }
- /* We also trim the character set a bit. */
- rx_bitset_intersection (rx->local_cset_size,
- csetout, e->params.cset);
- }
- }
- stateset = stateset->cdr;
- }
- return 1;
- }
-
-
- /* This is a constructor for RX_SUPER_EDGE structures. These are
- * wrappers for lists of superstate NFA edges that share character sets labels.
- * If a transition class contains more than one rx_distinct_future (superstate
- * edge), then it represents a non-determinism in the superstate NFA.
- */
-
- #ifdef __STDC__
- static struct rx_super_edge *
- rx_super_edge (struct rx *rx,
- struct rx_superstate *super, rx_Bitset cset,
- struct rx_distinct_future *df)
- #else
- static struct rx_super_edge *
- rx_super_edge (rx, super, cset, df)
- struct rx *rx;
- struct rx_superstate *super;
- rx_Bitset cset;
- struct rx_distinct_future *df;
- #endif
- {
- struct rx_super_edge *tc;
- int tc_size;
-
- tc_size = ( sizeof (struct rx_super_edge)
- + rx_sizeof_bitset (rx->local_cset_size));
-
- tc = ((struct rx_super_edge *)
- rx_cache_malloc (rx->cache, tc_size));
-
- if (!tc)
- return 0;
-
- tc->next = super->edges;
- super->edges = tc;
- tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
- tc->rx_backtrack_frame.data = 0;
- tc->rx_backtrack_frame.data_2 = (void *) tc;
- tc->options = df;
- tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
- rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
- if (df)
- {
- struct rx_distinct_future * dfp = df;
- df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
- while (dfp)
- {
- dfp->edge = tc;
- dfp = dfp->next_same_super_edge[0];
- }
- df->next_same_super_edge[1]->next_same_super_edge[0] = df;
- }
- return tc;
- }
-
-
- /* There are three kinds of cache miss. The first occurs when a
- * transition is taken that has never been computed during the
- * lifetime of the source superstate. That cache miss is handled by
- * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
- * occurs when the destination superstate of a transition doesn't
- * exist. SOLVE_DESTINATION is used to construct the destination superstate.
- * Finally, the third kind of cache miss occurs when the destination
- * superstate of a transition is in a `semi-free state'. That case is
- * handled by UNFREE_SUPERSTATE.
- *
- * The function of HANDLE_CACHE_MISS is to figure out which of these
- * cases applies.
- */
-
- #ifdef __STDC__
- static void
- install_partial_transition (struct rx_superstate *super,
- struct rx_inx *answer,
- RX_subset set, int offset)
- #else
- static void
- install_partial_transition (super, answer, set, offset)
- struct rx_superstate *super;
- struct rx_inx *answer;
- RX_subset set;
- int offset;
- #endif
- {
- int start = offset;
- int end = start + 32;
- RX_subset pos = 1;
- struct rx_inx * transitions = super->transitions;
-
- while (start < end)
- {
- if (set & pos)
- transitions[start] = *answer;
- pos <<= 1;
- ++start;
- }
- }
-
-
- #ifdef __STDC__
- struct rx_inx *
- rx_handle_cache_miss
- (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
- #else
- struct rx_inx *
- rx_handle_cache_miss (rx, super, chr, data)
- struct rx *rx;
- struct rx_superstate *super;
- unsigned char chr;
- void *data;
- #endif
- {
- int offset = chr / RX_subset_bits;
- struct rx_distinct_future *df = data;
-
- if (!df) /* must be the shared_cache_miss_frame */
- {
- /* Perhaps this is just a transition waiting to be filled. */
- struct rx_super_edge *tc;
- RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
-
- for (tc = super->edges; tc; tc = tc->next)
- if (tc->cset[offset] & mask)
- {
- struct rx_inx * answer;
- df = tc->options;
- answer = ((tc->options->next_same_super_edge[0] != tc->options)
- ? &tc->rx_backtrack_frame
- : (df->effects
- ? &df->side_effects_frame
- : &df->future_frame));
- install_partial_transition (super, answer,
- tc->cset [offset], offset * 32);
- return answer;
- }
- /* Otherwise, it's a flushed or newly encountered edge. */
- {
- char cset_space[1024]; /* this limit is far from unreasonable */
- rx_Bitset trcset;
- struct rx_inx *answer;
-
- if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
- return 0; /* If the arbitrary limit is hit, always fail */
- /* cleanly. */
- trcset = (rx_Bitset)cset_space;
- rx_lock_superstate (rx, super);
- if (!compute_super_edge (rx, &df, trcset, super, chr))
- {
- rx_unlock_superstate (rx, super);
- return 0;
- }
- if (!df) /* We just computed the fail transition. */
- {
- static struct rx_inx
- shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
- answer = &shared_fail_frame;
- }
- else
- {
- tc = rx_super_edge (rx, super, trcset, df);
- if (!tc)
- {
- rx_unlock_superstate (rx, super);
- return 0;
- }
- answer = ((tc->options->next_same_super_edge[0] != tc->options)
- ? &tc->rx_backtrack_frame
- : (df->effects
- ? &df->side_effects_frame
- : &df->future_frame));
- }
- install_partial_transition (super, answer,
- trcset[offset], offset * 32);
- rx_unlock_superstate (rx, super);
- return answer;
- }
- }
- else if (df->future) /* A cache miss on an edge with a future? Must be
- * a semi-free destination. */
- {
- if (df->future->is_semifree)
- refresh_semifree_superstate (rx->cache, df->future);
- return &df->future_frame;
- }
- else
- /* no future superstate on an existing edge */
- {
- rx_lock_superstate (rx, super);
- if (!solve_destination (rx, df))
- {
- rx_unlock_superstate (rx, super);
- return 0;
- }
- if (!df->effects
- && (df->edge->options->next_same_super_edge[0] == df->edge->options))
- install_partial_transition (super, &df->future_frame,
- df->edge->cset[offset], offset * 32);
- rx_unlock_superstate (rx, super);
- return &df->future_frame;
- }
- }
-
-
-